library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)
RRPLOTS and flchain
odata <- flchain
odata$chapter <- NULL
pander::pander(table(odata$death))
rownames(odata) <- c(1:nrow(odata))
data <- as.data.frame(model.matrix(Surv(futime,death)~.,odata))
data$`(Intercept)` <- NULL
dataFL <- as.data.frame(cbind(time=odata[rownames(data),"futime"],
status=odata[rownames(data),"death"],
data))
pander::pander(table(dataFL$status))
dataFL$time <- dataFL$time/365
Exploring Raw Features with RRPlot
convar <- colnames(dataFL)[lapply(apply(dataFL,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataFL[,c("status",convar)],"status")
pander::pander(topvar)
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
topFeature <- RRPlot(cbind(dataFL$status,dataFL[,topFive[1]]),
title=topFive[1])


par(op)
## With Survival Analysis
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
RRanalysis[[idx]] <- RRPlot(cbind(dataFL$status,dataFL[,topf]),
timetoEvent=dataFL$time,
atRate=c(0.90,0.80),
title=topf)
idx <- idx + 1
par(op)
}












names(RRanalysis) <- topFive
Reporting the Metrics
pander::pander(t(RRanalysis[[1]]$keyPoints),caption="Threshold values")
Threshold values
| Thr |
73.000 |
69.000 |
68.000 |
54.000 |
50.00000 |
| RR |
4.013 |
4.399 |
4.465 |
5.668 |
1.00000 |
| RR_LCI |
3.740 |
4.045 |
4.099 |
4.559 |
0.00000 |
| RR_UCI |
4.305 |
4.783 |
4.864 |
7.048 |
0.00000 |
| SEN |
0.581 |
0.713 |
0.726 |
0.960 |
1.00000 |
| SPE |
0.883 |
0.790 |
0.779 |
0.255 |
0.00614 |
| BACC |
0.732 |
0.752 |
0.753 |
0.607 |
0.50307 |
ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL
for (topf in topFive)
{
CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive
pander::pander(ROCAUC)
| age |
0.822 |
0.811 |
0.834 |
| kappa |
0.682 |
0.667 |
0.697 |
| lambda |
0.665 |
0.650 |
0.680 |
| creatinine |
0.590 |
0.574 |
0.606 |
pander::pander(CstatCI)
| age |
0.775 |
0.775 |
0.765 |
0.785 |
| kappa |
0.671 |
0.671 |
0.658 |
0.683 |
| lambda |
0.657 |
0.657 |
0.645 |
0.669 |
| creatinine |
0.586 |
0.586 |
0.572 |
0.601 |
pander::pander(LogRangp)
| age |
0.00e+00 |
| kappa |
4.90e-175 |
| lambda |
4.41e-145 |
| creatinine |
2.67e-67 |
pander::pander(Sensitivity)
| age |
0.581 |
0.558 |
0.602 |
| kappa |
0.319 |
0.298 |
0.340 |
| lambda |
0.300 |
0.279 |
0.321 |
| creatinine |
0.288 |
0.269 |
0.309 |
pander::pander(Specificity)
| age |
0.883 |
0.873 |
0.892 |
| kappa |
0.900 |
0.891 |
0.908 |
| lambda |
0.899 |
0.890 |
0.907 |
| creatinine |
0.865 |
0.854 |
0.875 |
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
| age |
0.822 |
0.775 |
0.581 |
0.883 |
| kappa |
0.682 |
0.671 |
0.319 |
0.900 |
| lambda |
0.665 |
0.657 |
0.300 |
0.899 |
| creatinine |
0.590 |
0.586 |
0.288 |
0.865 |
Train Test Set
trainsamples <- sample(nrow(dataFL),0.5*nrow(dataFL))
dataFLTrain <- dataFL[trainsamples,]
dataFLTest <- dataFL[-trainsamples,]
pander::pander(table(dataFLTrain$status))
pander::pander(table(dataFLTest$status))
Cox Modeling
ml <- BSWiMS.model(Surv(time,status)~.,data=dataFLTrain,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
| age |
0.0923 |
0.0689 |
0.0753 |
0.0814 |
0.723 |
| flc.grp |
0.0669 |
0.0035 |
0.0441 |
0.0896 |
0.603 |
| kappa |
0.1598 |
0.0781 |
0.2152 |
0.3564 |
0.659 |
| creatinine |
0.0284 |
-0.1185 |
0.0551 |
0.2173 |
0.551 |
Table continues below
| age |
0.605 |
0.728 |
0.737 |
0.625 |
0.744 |
| flc.grp |
0.720 |
0.728 |
0.625 |
0.737 |
0.744 |
| kappa |
0.725 |
0.728 |
0.633 |
0.741 |
0.744 |
| creatinine |
0.728 |
0.728 |
0.545 |
0.744 |
0.744 |
| age |
0.183264 |
0.876 |
26.82 |
26.38 |
-0.11905 |
1 |
| flc.grp |
0.005685 |
0.283 |
5.06 |
7.65 |
-0.00739 |
1 |
| kappa |
0.001412 |
0.057 |
2.38 |
1.51 |
-0.00299 |
1 |
| creatinine |
0.000135 |
0.049 |
1.94 |
1.30 |
0.00000 |
1 |
Cox Model Performance
The evaluation of the raw Cox model with RRPlot()
timeinterval <- 5
h0 <- sum(dataFLTrain$status & dataFLTrain$time <= timeinterval)
h0 <- h0/sum((dataFLTrain$time > timeinterval) | (dataFLTrain$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
| 0.139 |
5 |
index <- predict(ml,dataFLTrain)
rdata <- cbind(dataFLTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
timetoEvent=dataFLTrain$time,
title="Train: Breast Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)






Time to event
toinclude <- rdata[,1]==1
obstiemToEvent <- dataFLTrain[,"time"]
summary(obstiemToEvent)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000 7.774 11.736 9.946 13.071
14.153
tmin<-min(obstiemToEvent)
if (tmin < 0.01) tmin <- 0.01
tmax<-max(obstiemToEvent)
sum(toinclude)
[1] 1031
timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
timetoEvent[is.infinite(timetoEvent)] <- 3*tmax
timetoEvent[timetoEvent > 3*tmax] <- 3*tmax
timetoEvent[timetoEvent < tmin] <- tmin
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
| timetoEvent[toinclude] |
0.259 |
0.00999 |
26 |
1.08e-114 |
Fitting linear model: obstiemToEvent[toinclude] ~ 0 +
timetoEvent[toinclude]
| 1031 |
5.43 |
0.395 |
0.395 |
plot(timetoEvent,obstiemToEvent,
col=1+rdata[,1],
xlab="Expected time",
ylab="Observed time",
main="Expected vs. Observed",
xlim=c(tmin,tmax),
ylim=c(tmin,tmax),
log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
pch=c(1,1,-1),
col=c(1,2,"blue"),
lty=c(-1,-1,1)
)

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)
8.61
Cox Calibration
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataFLTrain,"status","time",timeInterval=timeinterval)
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
Time to event after calibration
timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
| timetoEvent[toinclude] |
0.133 |
0.006 |
22.1 |
3.44e-89 |
Fitting linear model: obstiemToEvent[toinclude] ~ 0 +
timetoEvent[toinclude]
| 1031 |
5.75 |
0.323 |
0.322 |
plot(timetoEvent,obstiemToEvent,
col=1+rdata[,1],
xlab="Expected time",
ylab="Observed time",
main="Expected vs. Observed",
xlim=c(tmin,tmax),
ylim=c(tmin,tmax),
log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
pch=c(1,1,-1),
col=c(1,2,"blue"),
lty=c(-1,-1,1)
)

MADerror2 <- c(MADerror2,mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude])))
pander::pander(MADerror2)
8.61 and 14.45